{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "164d758d-1809-43e4-a8a6-f3a859d04317",
   "metadata": {},
   "source": [
    "# Import packages and credentials"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "22271dd3-2843-4f2b-ab47-91c980fe68a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import google.generativeai as genai\n",
    "import google.ai.generativelanguage as glm\n",
    "from elasticsearch import Elasticsearch, helpers\n",
    "from langchain.vectorstores import ElasticsearchStore\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
    "from langchain.prompts import ChatPromptTemplate\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.schema.output_parser import StrOutputParser\n",
    "from langchain.schema.runnable import RunnablePassthrough\n",
    "from langchain.schema.runnable import RunnableLambda\n",
    "from langchain.schema import HumanMessage\n",
    "from urllib.request import urlopen\n",
    "from dotenv import load_dotenv\n",
    "import json, os"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f6cc8c8-d656-4bed-b92b-01f9e7ede8b5",
   "metadata": {},
   "source": [
    "# Get variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "acccfe92-fed1-4e0d-bb77-f1bfd8643b46",
   "metadata": {},
   "outputs": [],
   "source": [
    "load_dotenv()\n",
    "\n",
    "GOOGLE_API_KEY = os.getenv(\"GOOGLE_API_KEY\")\n",
    "ES_USER = os.getenv(\"ES_USER\")\n",
    "ES_PASSWORD = os.getenv(\"ES_PASSWORD\")\n",
    "elastic_index_name='gemini-qa'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "15e5bdf6-141c-4b48-95c9-bc2928b4bb4c",
   "metadata": {},
   "source": [
    "#  Add documents"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "edc7e27f-a959-4103-8f90-68e4a3680a71",
   "metadata": {},
   "source": [
    "## Download the sample dataset and deserialize the document."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c61cd824-4313-41f0-876e-778435b74f19",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Successfully loaded 15 documents\n"
     ]
    }
   ],
   "source": [
    "# Load data into a JSON object\n",
    "with open('./datasets/data.json') as f:\n",
    "   workplace_docs = json.load(f)\n",
    "\n",
    "print(f\"Successfully loaded {len(workplace_docs)} documents\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1c834967-eae0-4d1d-8053-0e3d0040111c",
   "metadata": {},
   "source": [
    "## Split Documents into Passages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "7c9137b4-cf66-48ae-b89b-2a86183f074b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Created a chunk of size 245, which is longer than the specified 50\n",
      "Created a chunk of size 288, which is longer than the specified 50\n",
      "Created a chunk of size 204, which is longer than the specified 50\n",
      "Created a chunk of size 281, which is longer than the specified 50\n",
      "Created a chunk of size 249, which is longer than the specified 50\n",
      "Created a chunk of size 285, which is longer than the specified 50\n",
      "Created a chunk of size 298, which is longer than the specified 50\n",
      "Created a chunk of size 270, which is longer than the specified 50\n",
      "Created a chunk of size 224, which is longer than the specified 50\n",
      "Created a chunk of size 288, which is longer than the specified 50\n",
      "Created a chunk of size 260, which is longer than the specified 50\n",
      "Created a chunk of size 199, which is longer than the specified 50\n",
      "Created a chunk of size 290, which is longer than the specified 50\n",
      "Created a chunk of size 251, which is longer than the specified 50\n",
      "Created a chunk of size 195, which is longer than the specified 50\n",
      "Created a chunk of size 242, which is longer than the specified 50\n",
      "Created a chunk of size 275, which is longer than the specified 50\n",
      "Created a chunk of size 277, which is longer than the specified 50\n",
      "Created a chunk of size 284, which is longer than the specified 50\n",
      "Created a chunk of size 285, which is longer than the specified 50\n",
      "Created a chunk of size 324, which is longer than the specified 50\n",
      "Created a chunk of size 328, which is longer than the specified 50\n",
      "Created a chunk of size 280, which is longer than the specified 50\n",
      "Created a chunk of size 123, which is longer than the specified 50\n",
      "Created a chunk of size 221, which is longer than the specified 50\n",
      "Created a chunk of size 330, which is longer than the specified 50\n",
      "Created a chunk of size 292, which is longer than the specified 50\n",
      "Created a chunk of size 240, which is longer than the specified 50\n",
      "Created a chunk of size 232, which is longer than the specified 50\n",
      "Created a chunk of size 324, which is longer than the specified 50\n",
      "Created a chunk of size 167, which is longer than the specified 50\n",
      "Created a chunk of size 181, which is longer than the specified 50\n",
      "Created a chunk of size 353, which is longer than the specified 50\n",
      "Created a chunk of size 471, which is longer than the specified 50\n",
      "Created a chunk of size 400, which is longer than the specified 50\n",
      "Created a chunk of size 607, which is longer than the specified 50\n",
      "Created a chunk of size 440, which is longer than the specified 50\n",
      "Created a chunk of size 788, which is longer than the specified 50\n",
      "Created a chunk of size 547, which is longer than the specified 50\n",
      "Created a chunk of size 67, which is longer than the specified 50\n",
      "Created a chunk of size 635, which is longer than the specified 50\n",
      "Created a chunk of size 440, which is longer than the specified 50\n",
      "Created a chunk of size 464, which is longer than the specified 50\n",
      "Created a chunk of size 372, which is longer than the specified 50\n",
      "Created a chunk of size 866, which is longer than the specified 50\n",
      "Created a chunk of size 111, which is longer than the specified 50\n",
      "Created a chunk of size 361, which is longer than the specified 50\n",
      "Created a chunk of size 110, which is longer than the specified 50\n",
      "Created a chunk of size 471, which is longer than the specified 50\n",
      "Created a chunk of size 288, which is longer than the specified 50\n",
      "Created a chunk of size 150, which is longer than the specified 50\n",
      "Created a chunk of size 62, which is longer than the specified 50\n",
      "Created a chunk of size 619, which is longer than the specified 50\n",
      "Created a chunk of size 257, which is longer than the specified 50\n",
      "Created a chunk of size 310, which is longer than the specified 50\n",
      "Created a chunk of size 310, which is longer than the specified 50\n",
      "Created a chunk of size 282, which is longer than the specified 50\n",
      "Created a chunk of size 230, which is longer than the specified 50\n",
      "Created a chunk of size 213, which is longer than the specified 50\n",
      "Created a chunk of size 395, which is longer than the specified 50\n",
      "Created a chunk of size 270, which is longer than the specified 50\n",
      "Created a chunk of size 224, which is longer than the specified 50\n",
      "Created a chunk of size 292, which is longer than the specified 50\n",
      "Created a chunk of size 231, which is longer than the specified 50\n",
      "Created a chunk of size 153, which is longer than the specified 50\n",
      "Created a chunk of size 277, which is longer than the specified 50\n",
      "Created a chunk of size 171, which is longer than the specified 50\n",
      "Created a chunk of size 106, which is longer than the specified 50\n",
      "Created a chunk of size 253, which is longer than the specified 50\n",
      "Created a chunk of size 140, which is longer than the specified 50\n",
      "Created a chunk of size 323, which is longer than the specified 50\n",
      "Created a chunk of size 95, which is longer than the specified 50\n",
      "Created a chunk of size 201, which is longer than the specified 50\n",
      "Created a chunk of size 174, which is longer than the specified 50\n",
      "Created a chunk of size 232, which is longer than the specified 50\n",
      "Created a chunk of size 238, which is longer than the specified 50\n",
      "Created a chunk of size 193, which is longer than the specified 50\n",
      "Created a chunk of size 305, which is longer than the specified 50\n",
      "Created a chunk of size 1120, which is longer than the specified 50\n",
      "Created a chunk of size 397, which is longer than the specified 50\n",
      "Created a chunk of size 320, which is longer than the specified 50\n",
      "Created a chunk of size 567, which is longer than the specified 50\n",
      "Created a chunk of size 300, which is longer than the specified 50\n",
      "Created a chunk of size 217, which is longer than the specified 50\n",
      "Created a chunk of size 271, which is longer than the specified 50\n",
      "Created a chunk of size 264, which is longer than the specified 50\n",
      "Created a chunk of size 340, which is longer than the specified 50\n",
      "Created a chunk of size 383, which is longer than the specified 50\n",
      "Created a chunk of size 243, which is longer than the specified 50\n",
      "Created a chunk of size 344, which is longer than the specified 50\n",
      "Created a chunk of size 225, which is longer than the specified 50\n",
      "Created a chunk of size 208, which is longer than the specified 50\n",
      "Created a chunk of size 420, which is longer than the specified 50\n",
      "Created a chunk of size 89, which is longer than the specified 50\n",
      "Created a chunk of size 94, which is longer than the specified 50\n",
      "Created a chunk of size 299, which is longer than the specified 50\n",
      "Created a chunk of size 310, which is longer than the specified 50\n"
     ]
    }
   ],
   "source": [
    "metadata = []\n",
    "content = []\n",
    "\n",
    "for doc in workplace_docs:\n",
    "  content.append(doc[\"content\"])\n",
    "  metadata.append({\n",
    "      \"name\": doc[\"name\"],\n",
    "      \"summary\": doc[\"summary\"],\n",
    "      \"rolePermissions\":doc[\"rolePermissions\"]\n",
    "  })\n",
    "\n",
    "text_splitter = CharacterTextSplitter(chunk_size=50, chunk_overlap=0)\n",
    "docs = text_splitter.create_documents(content, metadatas=metadata)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7886d0df-c810-4378-ac51-87dcf30e3efe",
   "metadata": {},
   "source": [
    "# Index Documents into Elasticsearch using Gemini Embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "d850cdd8-16f1-4108-8f56-8f8041bb5405",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'p4U3azj3TbSz7Fm_9W3Ylg', 'version': {'number': '8.12.0', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': '1665f706fd9354802c02146c1e6b5c0fbcddfbc9', 'build_date': '2024-01-11T10:05:27.953830042Z', 'build_snapshot': False, 'lucene_version': '9.9.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n"
     ]
    }
   ],
   "source": [
    "url = f\"https://{ES_USER}:{ES_PASSWORD}@192.168.0.3:9200\"\n",
    "\n",
    "connection = Elasticsearch(\n",
    "        hosts=[url], \n",
    "        ca_certs = \"./http_ca.crt\", \n",
    "        verify_certs = True\n",
    ")\n",
    "print(connection.info())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "fae4c8d9-2a10-406b-a0ab-a39c1a40db5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = GoogleGenerativeAIEmbeddings(\n",
    "    model=\"models/embedding-001\", task_type=\"retrieval_document\"\n",
    ")\n",
    "\n",
    "es = ElasticsearchStore.from_documents( \n",
    "                            docs,\n",
    "                            embedding = embeddings, \n",
    "                            es_url = url, \n",
    "                            es_connection = connection,\n",
    "                            index_name = elastic_index_name, \n",
    "                            es_user = ES_USER,\n",
    "                            es_password = ES_PASSWORD)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "172cb91b-b63b-4f64-a1ed-84881b7fd293",
   "metadata": {},
   "source": [
    "# Create a retriever using Elasticsearch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "0303fd89-25fe-4e83-a96b-7b7e5f00e106",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = GoogleGenerativeAIEmbeddings(\n",
    "    model=\"models/embedding-001\", task_type=\"retrieval_query\"\n",
    ")\n",
    "\n",
    "retriever = es.as_retriever(search_kwargs={\"k\": 3})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "979b1926-9543-4cf9-8ade-a00206da439c",
   "metadata": {},
   "source": [
    "# Fromat Docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "1499bb91-cfeb-4f77-9b93-14cff3dc09a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7047c08-7c09-40ce-acb2-7d321bc99fce",
   "metadata": {},
   "source": [
    "# Create a Chain using Prompt Template + gemini-pro model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "3a6a85ff-360c-4b41-81c8-cfd308faa411",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'- Increase revenue by 20% compared to fiscal year 2023.\\n- Expand market share in key segments by 15%.\\n- Retain 95% of existing customers and increase customer satisfaction ratings.\\n- Launch at least two new products or services in high-demand market segments.'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "template = \"\"\"Answer the question based only on the following context:\\n\n",
    "\n",
    "{context}\n",
    "\n",
    "Question: {question}\n",
    "\"\"\"\n",
    "prompt = ChatPromptTemplate.from_template(template)\n",
    "\n",
    "\n",
    "chain = (\n",
    "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()} \n",
    "    | prompt \n",
    "    | ChatGoogleGenerativeAI(model=\"gemini-pro\", temperature=0.7) \n",
    "    | StrOutputParser()\n",
    ")\n",
    "\n",
    "chain.invoke(\"what is our sales goals?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee91b48b-b951-4183-b9d1-c48b7458ab6a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python_3.11",
   "language": "python",
   "name": "python_3.11"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
